We will now load the CORD-19 .csv of our found tokens and their corresponding frequencies.
# COVID-19 average cases and deaths by state and county in week leading up to 2020 election
data_news <- read.csv("all-the-news-2-1-table.csv")
data_news$year.cont <- data_news$year + ((data_news$month - 1) / 12) + data_news$day / 365
data_news
#data_language$freq <- as.numeric(data_language$freq)
Let’s check the correlation of pos, neg, neu sentiments between title and article.
cor(data_news[4:9])
## title.neg.sent title.neu.sent title.pos.sent article.neg.sent
## title.neg.sent 1.0000000 -0.742735437368 -0.1889036 0.4791085
## title.neu.sent -0.7427354 1.000000000000 -0.5172237 -0.3463854
## title.pos.sent -0.1889036 -0.517223674406 1.0000000 -0.1043972
## article.neg.sent 0.4791085 -0.346385438075 -0.1043972 1.0000000
## article.neu.sent -0.1058188 0.188143974822 -0.1406767 -0.2568014
## article.pos.sent -0.2046657 -0.000002852297 0.2616142 -0.2259479
## article.neu.sent article.pos.sent
## title.neg.sent -0.1058188 -0.204665746568
## title.neu.sent 0.1881440 -0.000002852297
## title.pos.sent -0.1406767 0.261614248961
## article.neg.sent -0.2568014 -0.225947897792
## article.neu.sent 1.0000000 -0.121816184302
## article.pos.sent -0.1218162 1.000000000000
Compute weighted averages on sentiment
# compute weighted average of title sentiment (with direction)
data_news$title.sent <- ifelse((data_news$title.pos.sent + data_news$title.neg.sent + data_news$title.neu.sent) == 0, 0, (data_news$title.pos.sent - data_news$title.neg.sent) / (data_news$title.pos.sent + data_news$title.neg.sent + data_news$title.neu.sent))
# compute weighted average of article sentiment (with direction)
data_news$article.sent <- ifelse((data_news$article.pos.sent + data_news$article.neg.sent + data_news$article.neu.sent) == 0, 0, (data_news$article.pos.sent - data_news$article.neg.sent) / (data_news$article.pos.sent + data_news$article.neg.sent + data_news$article.neu.sent))
# remove all the exact zero-weight sentiment (could be due to N/A content)
data_news <- data_news %>% filter(!(title.sent == 0 && article.sent == 0))
Now, we check correlations between weighted title and article sentiment.
cor(data_news[13:14])
## year.cont title.sent
## year.cont 1.00000000 0.05265997
## title.sent 0.05265997 1.00000000
# build chart
data_news %>% plot_ly(x = ~article.sent, y = ~title.sent,
type = "scatter") %>%
layout(xaxis = list(range=c(-1,1)), yaxis = list(range=c(-1,1)))
Compute average of title and content.
# compute weighted average of title sentiment (with direction)
data_news$sent <- data_news$title.sent + data_news$article.sent
Let’s check our new synthetic sentiment measure.
# build chart
data_news %>% plot_ly(x = ~sent,
type = "histogram") %>%
layout(xaxis = list(range=c(-1,1)))
Display a timeline of sentiments, based on this new unidimensional sentiment model.
# build chart
data_news %>% plot_ly(x = ~sent, y = ~year, color=~publication, type = "scatter",
text = paste0("<b>", data_news$publication,
"</b><br><br>Article: <i>",
data_news$title,
"</i><br>URL: <a href='",
data_news$url, "'>",
substr(data_news$url, 0, 30),
"...</a>"),
hovertemplate = "%{text}") %>%
layout(xaxis = list(range=c(-1,1)), yaxis = list(range=c(2015,2021)))
# build chart
fig_cnn <- plot_ly(data_news %>% filter(publication == "CNN"),
x = ~sent, y = ~year, name = "CNN", type = "scatter")
fig_cnn <- fig_cnn %>% layout(xaxis = list(range=c(-1,1)), yaxis = list(range=c(2015,2021)))
fig_nyt <- plot_ly(data_news %>% filter(publication == "The New York Times"),
x = ~sent, y = ~year, name = "The New York Times", type = "scatter")
fig_nyt <- fig_nyt %>% layout(xaxis = list(range=c(-1,1)), yaxis = list(range=c(2015,2021)))
fig_fox <- plot_ly(data_news %>% filter(publication == "Fox News"),
x = ~sent, y = ~year, name = "Fox News", type = "scatter")
fig_fox <- fig_fox %>% layout(xaxis = list(range=c(-1,1)), yaxis = list(range=c(2015,2021)))
plotly::subplot(fig_cnn, fig_nyt, fig_fox)
rm(fig_cnn)
rm(fig_nyt)
rm(fig_fox)
# build chart
fig <- plot_ly()
fig <- fig %>% add_trace(data = data_news %>% filter(year == 2016), x = ~sent, name = "2016", type = "violin")
fig <- fig %>% add_trace(data = data_news %>% filter(year == 2017), x = ~sent, name = "2017", type = "violin")
fig <- fig %>% add_trace(data = data_news %>% filter(year == 2018), x = ~sent, name = "2018", type = "violin")
fig <- fig %>% add_trace(data = data_news %>% filter(year == 2019), x = ~sent, name = "2019", type = "violin")
fig <- fig %>% add_trace(data = data_news %>% filter(year == 2020), x = ~sent, name = "2020", type = "violin")
fig <- fig %>% add_trace(data = data_news, x = ~sent, y = ~year, yaxis="y2", name = "Year") %>%
layout(yaxis2 = list(overlaying = "y", side = "right"))
fig
rm(fig)
# build chart
data_news$sent.transform <- ((data_news$sent)/(sqrt((data_news$sent)^2))) * abs(data_news$sent)^(1/5)
data_news %>% plot_ly(x = ~year.cont, y = ~title.neg.sent, color=~publication, type = "scatter") %>%
layout(yaxis = list(range=c(-.1, 0.6)))
# layout(xaxis = list(range=c(-1,1)), yaxis = list(range=c(2015,2021)))
# build sample data for display
data_news_sample <- data_news[sample(1:nrow(data_news), size = 100,
prob = (data_news$sent)^2, replace = F), ]
d_n_s_fox <- data_news_sample %>% filter(publication == "Fox News")
d_n_s_nyt <- data_news_sample %>% filter(publication == "The New York Times")
d_n_s_cnn <- data_news_sample %>% filter(publication == "CNN")
# build chart
fig <- plot_ly() %>%
layout(title = list(text = "Popular Media Emotion Between 2016 and 2020"))
fig <- fig %>%
add_trace(data = d_n_s_fox, color = I("#002885"),
x = ~sent, y = ~year.cont, yaxis = "y2", name = ~publication,
text = paste0("<b>", d_n_s_fox$publication,
"</b><br><br>Article: <i>",
d_n_s_fox$title,
"</i><br>URL: <a href='",
d_n_s_fox$url, "'>",
substr(d_n_s_fox$url, 0, 30),
"...</a>"),
hovertemplate = "%{text}") %>%
add_trace(data = d_n_s_nyt, color = I("#555555"),
x = ~sent, y = ~year.cont, yaxis = "y2", name = ~publication,
text = paste0("<b>", d_n_s_nyt$publication,
"</b><br><br>Article: <i>",
d_n_s_nyt$title,
"</i><br>URL: <a href='",
d_n_s_nyt$url, "'>",
substr(d_n_s_nyt$url, 0, 30),
"...</a>"),
hovertemplate = "%{text}") %>%
add_trace(data = d_n_s_cnn, color = I("#EC2029"),
x = ~sent, y = ~year.cont, yaxis = "y2", name = ~publication,
text = paste0("<b>", d_n_s_cnn$publication,
"</b><br><br>Article: <i>",
d_n_s_cnn$title,
"</i><br>URL: <a href='",
d_n_s_cnn$url, "'>",
substr(d_n_s_cnn$url, 0, 30),
"...</a>"),
hovertemplate = "%{text}") %>%
layout(
xaxis = list(title = "Article Emotion", tickmode = "array",
nticks = 2, tickvals = c(-1, 1),
ticktext = c("More Negative", "More Positive")),
xaxis2 = list(title = "Left"),
yaxis = list(title = "News Source"),
yaxis2 = list(title = "Year", overlaying = "y", side = "right", autotick = F, dtick = 1),
legend = list(x = 1.1)
)
fig <- fig %>% add_trace(data = data_news %>% filter(publication == "Fox News"), color = I("#002885"),
x = ~sent, name = "Fox", type = "violin", hoveron="points", box = list(visible = T))
fig <- fig %>% add_trace(data = data_news %>% filter(publication == "The New York Times"), color = I("#555555"),
x = ~sent, name = "NYT", type = "violin", hoveron="points", box = list(visible = T))
fig <- fig %>% add_trace(data = data_news %>% filter(publication == "CNN"), color = I("#EC2029"),
x = ~sent, name = "CNN", type = "violin", hoveron="points", box = list(visible = T))
fig
rm(fig)